*****************************************************************************************************
* Purpose: Clean HRS mortality data (interview and death dates) for analysis
* Written by: Hunter Green
* Last updated: 2024-12-28
* Stata version: 18.0
*****************************************************************************************************

* Toggle for whether David is working on this
global David = "F"


*****************************************************************************************************
* Options, global macros
*****************************************************************************************************
* Options
version 18.0
clear all
set more off
set varabbrev off
pause on
set maxvar 20000

* Global folder macros
if "${David}" == "T" {   //David add your folder paths here
	* paper
	global paper_data "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw"
	}
else {
	* paper
	global paper_data "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw"
}


*****************************************************************************************************
* Clean data
*****************************************************************************************************
* Open RAND HRS
use hhidpn hhid pn inw10 inw11 inw12 inw13 inw14 inw15 r10iwendm r11iwendm r12iwendm r13iwendm r14iwendm r15iwendm ///
    r10iwendy r11iwendy r12iwendy r13iwendy r14iwendy r15iwendy radyear radmonth using "${paper_data}/HRS/randhrs1992_2020v2.dta", clear


*** Wave 10
* Merge HRS Wave 10 interview
merge 1:1 hhid pn using "${paper_data}/HRS/hd10f6a.dta", keepusing(mpmelig)
drop _merge

* Year and month of Wave 10 interview
gen w10_ym = ym(r10iwendy, r10iwendm)
format w10_ym %tm
drop r10iwendm r10iwendy


*** Wave 11
* Merge HRS Wave 11 interview
merge 1:1 hhid pn using "${paper_data}/HRS/h12f3a.dta", keepusing(npmelig)
drop _merge hhid pn

* Keep if in Wave 10 or Wave 11
keep if inw10 == 1 | inw11 == 1
* n=22,805

* Year and month of Wave 11 interview
gen w11_ym = ym(r11iwendy, r11iwendm)
format w11_ym %tm
drop r11iwendm r11iwendy


*** Wave 12
* Year and month of Wave 12 interview
gen w12_ym = ym(r12iwendy, r12iwendm)
format w12_ym %tm
drop r12iwendm r12iwendy


*** Wave 13
* Year and month of Wave 13 interview
gen w13_ym = ym(r13iwendy, r13iwendm)
format w13_ym %tm
drop r13iwendm r13iwendy


*** Wave 14
* Year and month of Wave 14 interview
gen w14_ym = ym(r14iwendy, r14iwendm)
format w14_ym %tm
drop r14iwendm r14iwendy


*** Wave 15
* Year and month of Wave 15 interview
gen w15_ym = ym(r15iwendy, r15iwendm)
format w15_ym %tm
drop r15iwendm r15iwendy


*** Year and month of biomarker collection
gen biomarker_ym = w10_ym if inlist(mpmelig,1,5,6,7)
replace biomarker_ym = w11_ym if inlist(npmelig,1,5,6,7)
format biomarker_ym %tm


*** Death
* "True" death status and time
gen true_deceased = 1 if !mi(radyear)

gen deathyr = radyear
gen deathmo = radmonth
* Impute month as 6 if missing month of death by not year of death
replace deathmo = 6 if mi(radmonth) & !mi(radyear)

gen true_death_ym = ym(deathyr, deathmo)
format true_death_ym %tm
drop radyear radmonth

* "Study" death status and time
gen study_deceased = true_deceased
gen study_death_ym = true_death_ym
format study_death_ym %tm

* In study if died before 2020, otherwise censor
replace study_deceased = . if inrange(deathyr,2020,2021)
replace study_death_ym = . if inrange(deathyr,2020,2021)


*** Censor
* Indicator for end of study (December 2019)
gen end_study_ym = ym(2019, 12)
format end_study_ym %tm

* Year and month of censor
gen censor_ym = end_study_ym if inrange(deathyr,2020,2021)
replace censor_ym = end_study_ym if !mi(w15_ym) & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w14_ym if !mi(w14_ym) & mi(w15_ym) & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w13_ym if !mi(w13_ym) & mi(w15_ym) & mi(w14_ym) & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w12_ym if !mi(w12_ym) & mi(w15_ym) & mi(w14_ym) & mi(w13_ym) & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w11_ym if !mi(w11_ym) & mi(w15_ym) & mi(w14_ym) & mi(w13_ym) & mi(w12_ym) & mi(study_death_ym) & mi(censor_ym) & inlist(mpmelig,1,5,6,7)
format censor_ym %tm
drop deathyr deathmo end_study_ym


*** Order variables
order hhidpn inw10 inw11 inw12 inw13 inw14 inw15 w10_ym w11_ym w12_ym w13_ym w14_ym w15_ym biomarker_ym ///
      true_deceased true_death_ym study_deceased study_death_ym censor_ym


*** Label variables	
label variable inw10 "HRS in Wave 10 sample"
label variable inw11 "HRS in Wave 11 sample"
label variable inw12 "HRS in Wave 12 sample"
label variable inw13 "HRS in Wave 13 sample"
label variable inw14 "HRS in Wave 14 sample"
label variable inw15 "HRS in Wave 15 sample"
label variable w10_ym "HRS Wave 10 interview year & month"
label variable w11_ym "HRS Wave 11 interview year & month"
label variable w12_ym "HRS Wave 12 interview year & month"
label variable w13_ym "HRS Wave 13 interview year & month"
label variable w14_ym "HRS Wave 14 interview year & month"
label variable w15_ym "HRS Wave 15 interview year & month"
label variable biomarker_ym "HRS year & month of biomarker collection"
label variable true_deceased "R was deceased by HRS Wave 15 (True)"
label variable true_death_ym "HRS year & month of mortality (True)"
label variable study_deceased "R was deceased by December 2019 (Study)"
label variable study_death_ym "HRS year & month of mortality to December 2019 (Study)"
label variable censor_ym "HRS year & month of censor"


*** Save data
save "${paper_data}/hrs_dates.dta", replace

